9. Tips & Tricks - Analysis workflow of protein expression
This is the tutorial compatible with the Tips&Tricks training held on 27th Nov., 2024.
1. Read in data
# the download link of the demo data is not yet available
# please write to biox_support_emea@bd.com to request the data
demo <- readRDS("demo_202411.rds")2. Normalization with CLR
DefaultAssay(demo) <- "ADT"
demo <- NormalizeData(demo, normalization.method = "CLR", margin = 2)3. QC - Outlier
# function to define outlier
is_outlier <- function(adata, nmads) {
# Extract the metric values from the dataset
M <- tibble(cell_label = colnames(adata),
log1p_adt = log1p(adata@meta.data$nCount_ADT))
# Calculate the median and MAD
median_val = median(M$log1p_adt)
mad_val = mad(M$log1p_adt)
# Determine outliers based on the number of MADs from the median
M <- mutate(M, outlier = (M$log1p_adt < median_val - nmads * mad_val) | (M$log1p_adt > median_val + nmads * mad_val))
outlier <- dplyr::filter(M, outlier == TRUE) %>%
.$cell_label
return(outlier)
}
# check which cells are outliers in each sample
sample1_outlier <- is_outlier(adata = subset(demo, subset = orig.ident == "09ABC"), 5)
sample2_outlier <- is_outlier(adata = subset(demo, subset = orig.ident == "IC-Day1"), 5)
demo_outlier <- c(sample1_outlier, sample1_outlier)# plot outlier on violin plot
plot_outlier <- FetchData(demo, vars = "nCount_ADT") %>%
rownames_to_column("cell_label") %>%
dplyr::filter(., cell_label %in% demo_outlier) %>%
mutate(ident = "09ABC")
vln_plot <- VlnPlot(demo, features = "nCount_ADT", pt.size = 0)
vln_plot +
scale_y_log10() +
geom_jitter(data = plot_outlier,
aes(x = ident, y = nCount_ADT),
color = "red",
size = 0.5,
width = 0.1,
fill = "red")# remove outlier
demo <- subset(demo, cells = demo_outlier, invert = T)3. QC - Doublets
# list all protein markers
rownames(demo) [1] "CCR7-protein" "CD11c-protein" "CD127-protein" "CD134-protein"
[5] "CD137-protein" "CD14-protein" "CD161-protein" "CD16-protein"
[9] "CD183-protein" "CD196-protein" "CD19-protein" "CD25-protein"
[13] "CD272-protein" "CD278-protein" "CD279-protein" "CD27-protein"
[17] "CD28-protein" "CD3-protein" "CD45RA-protein" "CD4-protein"
[21] "CD56-protein" "CD62L-protein" "CD8-protein" "CXCR5-protein"
[25] "CXCR6-protein" "GITR-protein" "HLA-protein" "IgD-protein"
[29] "IgM-protein" "Tim3-protein"
FeatureScatter(demo, feature1 = "CD3-protein", feature2 = "CD19-protein", slot = "counts", cols = c("black", "black")) +
scale_x_continuous(trans = trans_new("log1p", log1p, expm1)) +
scale_y_continuous(trans = trans_new("log1p", log1p, expm1)) +
ggtitle("log1p total counts") +
theme(legend.position = "none",
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, size = 8),
axis.text.y = element_text(size = 8)) +
coord_fixed(ratio = 1) +
geom_vline(xintercept = 300, linetype = "dashed", color = "red", size = 1) +
geom_hline(yintercept = 60, linetype = "dashed", color = "red", size = 1) FeatureScatter(demo, feature1 = "CD3-protein", feature2 = "CD14-protein", slot = "counts", cols = c("black", "black")) +
scale_x_continuous(trans = trans_new("log1p", log1p, expm1)) +
scale_y_continuous(trans = trans_new("log1p", log1p, expm1)) +
ggtitle("log1p total counts") +
theme(legend.position = "none",
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, size = 8),
axis.text.y = element_text(size = 8)) +
coord_fixed(ratio = 1) +
geom_vline(xintercept = 300, linetype = "dashed", color = "red", size = 1) +
geom_hline(yintercept = 100, linetype = "dashed", color = "red", size = 1) # Remove doublets
demo_filtered <- subset(demo, subset = `CD3-protein` > 300 & `CD19-protein` > 60 |
`CD3-protein` > 300 & `CD14-protein`> 100, slot = 'counts', invert = T)# plot again
Idents(demo_filtered) <- 'orig.ident'
FeatureScatter(demo_filtered, feature1 = "CD3-protein", feature2 = "CD19-protein", slot = "counts", cols = c("black", "black")) +
scale_x_continuous(trans = trans_new("log1p", log1p, expm1)) +
scale_y_continuous(trans = trans_new("log1p", log1p, expm1)) +
ggtitle("log1p total counts") +
theme(legend.position = "none",
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, size = 8),
axis.text.y = element_text(size = 8)) +
coord_fixed(ratio = 1) +
geom_vline(xintercept = 400, linetype = "dashed", color = "red", size = 1) +
geom_hline(yintercept = 100, linetype = "dashed", color = "red", size = 1) FeatureScatter(demo_filtered, feature1 = "CD3-protein", feature2 = "CD14-protein", slot = "counts", cols = c("black", "black")) +
scale_x_continuous(trans = trans_new("log1p", log1p, expm1)) +
scale_y_continuous(trans = trans_new("log1p", log1p, expm1)) +
ggtitle("log1p total counts") +
theme(legend.position = "none",
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, size = 8),
axis.text.y = element_text(size = 8)) +
coord_fixed(ratio = 1) +
geom_vline(xintercept = 400, linetype = "dashed", color = "red", size = 1) +
geom_hline(yintercept = 100, linetype = "dashed", color = "red", size = 1) 4. Dimentionality reduction
# elbow plot
ElbowPlot(demo_filtered, reduction = 'apca', ndims = 30)# visualize plots in batch and in protein expression
pal <- viridis::viridis(n = 10, option = "G", begin = 0, direction = -1)
p1 <- DimPlot(demo_filtered, group.by = "orig.ident") + ggtitle('Batch') + theme(axis.title.x = element_blank(), axis.title.y = element_blank())
p2 <- FeaturePlot_scCustom(demo_filtered, features = 'CD14-protein', colors_use = pal) + theme(axis.title.x = element_blank(), axis.title.y = element_blank())
p3 <- FeaturePlot_scCustom(demo_filtered, features = 'CD4-protein', colors_use = pal) + theme(axis.title.x = element_blank(), axis.title.y = element_blank())
p4 <- FeaturePlot_scCustom(demo_filtered, features = 'CD8-protein', colors_use = pal) + theme(axis.title.x = element_blank(), axis.title.y = element_blank())
grid.arrange(p1, p2, p3, p4, ncol = 2)5. Batch correction
# create a new object to store corrected values
demo_rpca <- demo_filtered
# step1: split the layers
demo_rpca[["ADT"]] <- split(demo_filtered[["ADT"]], f = demo_filtered$orig.ident)
# step2: integrate layers and asign results to new Seurat object
demo_rpca <- Seurat::IntegrateLayers(object = demo_rpca, method = 'RPCAIntegration',
orig.reduction = "apca",
new.reduction = "integrated.rpca",
verbose = FALSE,
features = rownames(demo_filtered),
dims = 1:29)
# step3: re-join layers after integration
demo_rpca[["ADT"]] <- JoinLayers(demo_rpca[["ADT"]])# calculate UMAP using the batch corrected values
demo_rpca <- RunUMAP(demo_rpca, reduction = 'integrated.rpca', dims = 1:29, reduction.name = "integrated.rpca.aumap")6. Clustering and annotation
# calculate clusters
demo_rpca <- FindNeighbors(demo_rpca, dims = 1:29, reduction = 'integrated.rpca', verbose = F)
demo_rpca <- FindClusters(demo_rpca, resolution = c(0.1, 0.2, 0.3, 0.5, 0.7, 0.9), verbose = F)# choose resolution 0.2 based on cluster tree
clustree::clustree(demo_rpca, prefix = "ADT_snn_res.")# plot heatmap with a selection of markers
DefaultAssay(demo_rpca) <- "ADT"
Idents(demo_rpca) <- 'ADT_snn_res.0.2'
DotPlot(demo_rpca, features = c('CD3-protein', 'CD4-protein', 'CD8-protein', 'CD45RA-protein', 'CD11c-protein', 'CD14-protein', 'CD16-protein', 'CD19-protein', 'CD161-protein', 'IgD-protein', 'CD56-protein', 'CD183-protein', 'CD27-protein'), cols = "RdYlBu") +
theme(axis.text.x = element_text(angle=90, vjust=0.5, hjust=1))# plot heatmap with all markers
DotPlot(demo_rpca, features = rownames(demo_rpca), cols = "RdYlBu") +
theme(axis.text.x = element_text(angle=90, vjust=0.5, hjust=1))# plot UMAP with clusters
DimPlot_scCustom(demo_rpca, group.by = "ADT_snn_res.0.2", reduction = 'integrated.rpca.aumap', label = T)# plot UMAP with cell type annotations
new.cluster.ids <- c("CD14 Monocyte", "Memory CD4T", "CD8 Teff", "Memory CD8T", "Naive CD4T", "NK", "Naive CD8T", "CD16 Monocyte", "Memore B", "CD8 TEM", "Naive B", "Mait", "DC")
names(new.cluster.ids) <- levels(demo_rpca)
demo_rpca <- RenameIdents(demo_rpca, new.cluster.ids)
DimPlot(demo_rpca, reduction = "integrated.rpca.aumap", label = TRUE, pt.size = 0.5) + NoLegend()7. WNN
# analysis workflow of RNA
# step1: split the layers ftom "both" Seurat object by Sample_Name
demo_rpca[["RNA"]] <- split(demo_rpca[["RNA"]], f = demo_rpca$orig.ident)
demo_rpca <- SCTransform(demo_rpca, vars.to.regress = "percent.mt", verbose = FALSE)
demo_rpca <- RunPCA(demo_rpca, verbose = FALSE)
demo_rpca <- RunUMAP(demo_rpca, verbose = FALSE, dims = 1:30)
# step2: integrate layers and asign results to new Seurat object
demo_rpca <- Seurat::IntegrateLayers(object = demo_rpca, method = 'HarmonyIntegration',
orig.reduction = "pca",
new.reduction = "integrated.harmony",
verbose = FALSE,
assay = "SCT")
# step3: re-join layers after integration
demo_rpca[["RNA"]] <- JoinLayers(demo_rpca[["RNA"]])# WNN to integrate RNA and protein data
demo_rpca <- FindMultiModalNeighbors(
demo_rpca, reduction.list = list("integrated.harmony", "integrated.rpca"),
dims.list = list(1:30, 1:29), modality.weight.name = "RNA.weight"
)
demo_rpca <- RunUMAP(demo_rpca, nn.name = "weighted.nn", reduction.name = "wnn.umap", reduction.key = "wnnUMAP_")